Analyses of swisscom data

API data

Data

Grid

Testing on several PLZs of Bern & surroundings.

bern_plz <- read_rds("data/grid/bern_plz.Rds") %>% 
  # duplicates are out for visualizations! 
  # fix if PLZ is needed more precisely!
  distinct(tileId, .keep_all = TRUE) %>% 
  rename(tile_id = tileId)

bern_plz_ct <- st_centroid(bern_plz)
x <character> 
# total N=6232 valid N=6232 mean=3043.83 sd=37.25

Value |    N | Raw % | Valid % | Cum. %
---------------------------------------
 3005 |  198 |  3.18 |    3.18 |   3.18
 3006 |  583 |  9.35 |    9.35 |  12.53
 3007 |  227 |  3.64 |    3.64 |  16.17
 3008 |  406 |  6.51 |    6.51 |  22.69
 3010 |    6 |  0.10 |    0.10 |  22.79
 3011 |   82 |  1.32 |    1.32 |  24.10
 3012 |  535 |  8.58 |    8.58 |  32.69
 3013 |  130 |  2.09 |    2.09 |  34.77
 3014 |  291 |  4.67 |    4.67 |  39.44
 3018 |  562 |  9.02 |    9.02 |  48.46
 3027 |  655 | 10.51 |   10.51 |  58.97
 3073 |  509 |  8.17 |    8.17 |  67.14
 3074 |  327 |  5.25 |    5.25 |  72.38
 3084 |  473 |  7.59 |    7.59 |  79.97
 3095 |  122 |  1.96 |    1.96 |  81.93
 3097 |  123 |  1.97 |    1.97 |  83.91
 3098 | 1003 | 16.09 |   16.09 | 100.00
 <NA> |    0 |  0.00 |    <NA> |   <NA>

Dwell density

Data is from http://mip.swisscom.ch which swisscom describes as:

Our new API platform offering 3 endpoints focusing on density, dwell times and origin destination

Important note: free data is limited to 2020-01-27 only!

We are using Heatmaps API to retrieve daily and hourly dwell times for one postcode. Code to retrieve data, kindly provided by Yann Steimer from swisscom, is in example_notebook_SC_heatmaps_API_UNIBE.ipynb.

Daily dwell density

read_fun <- function(filename) {
  
  data <- readr::read_delim(filename, 
                            delim = ";", escape_double = FALSE, trim_ws = TRUE,
                            show_col_types = FALSE) %>% 
    dplyr::select(tile_id, time, score) %>% 
    dplyr::as_tibble()
  
  data$plz <- gsub("_day|.csv", "", filename)
  data$plz <- gsub("data/swisscom/", "", data$plz)
  
  return(data)
}

doFuture::registerDoFuture()
future::plan("multisession", workers = 8)

data_day <- plyr::ldply(.data = fs::dir_ls("data/swisscom/", 
                                           regexp = "[0-9]_day[.]csv$"),
                        .fun = read_fun,
                        .id = NULL,
                        .parallel = TRUE) %>% 
  as_tibble() %>% 
  distinct(tile_id, time, .keep_all = TRUE)

Hourly dwell density

read_fun <- function(filename) {
  
  data <- readr::read_delim(filename, 
                            delim = ";", escape_double = FALSE, trim_ws = TRUE,
                            show_col_types = FALSE) %>% 
    dplyr::select(tile_id, time, score) %>% 
    dplyr::as_tibble()
  
  data$plz <- gsub("_hour|.csv", "", filename)
  data$plz <- gsub("data/swisscom/", "", data$plz)
  
  return(data)
}

data_hour <- plyr::ldply(.data = fs::dir_ls("data/swisscom/", 
                                            regexp = "[0-9]_hour[.]csv$"),
                         .fun = read_fun,
                         .id = NULL,
                         .parallel = TRUE) %>% 
  as_tibble() %>% 
  distinct(tile_id, time, .keep_all = TRUE)

EDA

Daily

bern_plz_day <- bern_plz %>% 
  left_join(data_day %>% select(-time))

Hourly

bern_plz_hour <- bern_plz %>% 
  left_join(data_hour)